*************************
/* GETTING THE DATASET */
*************************
clear all
set more off

global folder /Users/yasenov/Dropbox/mariel_boatlift/revision_version/revision_data/
use $folder/raw_may_cps.dta
keep if year >= 1973 & year <= 1978
replace earnwt = earnwt / 100 

sort smsarank year
by smsarank: gen first_year = year[1] 
by smsarank: gen last_year = year[_N]
keep if first_year == 1973 & last_year == 1978
drop first_year last_year

append using $folder/raw_org_cps_mariel.dta
keep year smsarank age sex race ethnic earnhre earnhr unemp gradeat gradecp pmsarank msafips cmsacode whitecol bluecol union ///
	 cmsarank manuf class classer esr in_lf lfsr89 ftpt79 ftpt89 earnwt earnwke uhourse weight ///
	 
keep if age >= 16 & age <= 65

merge m:1 cmsarank pmsarank using $folder/crosswalk.dta, nogen update 
merge m:1 cmsarank msafips using $folder/crosswalk.dta, nogen update 
merge m:m cmsarank using $folder/crosswalk.dta, nogen update   // no updates are made for cmsa's with many smsa areas.

drop if smsarank == 0 | smsarank == . 
drop if earnwt < 0 | weight < 0

***************************
/* DEMOGRAPHIC VARIABLES */
***************************
gen low_skilled = gradeat < 12
replace low_skilled = 1 if gradeat == 12 & gradecp == 2
gen college = gradeat >= 16
replace college = 0 if gradeat == 16 & gradecp == 2
gen male = sex == 1
gen white = (race == 1 & ethnic == 8)
gen black = (race == 2 & ethnic == 8)
gen hisp = (ethnic != 8 & ethnic != 5)
gen cuban = (ethnic == 5)
gen young = age < 30
gen hisp_lowskilled = (ethnic != 8 & low_skilled == 1)
gen cuban_lowskilled = (cuban == 1 & low_skilled == 1)

****************************
/* LABOR MARKET VARIABLES */
****************************
gen uearnhre = (earnwke/uhourse)

global vars uearnhre earnwke
foreach i of global vars {
	replace `i' = `i' * 1.854428813 if year == 1973 
	replace `i' = `i' * 1.67047874 if year == 1974 
	replace `i' = `i' * 1.530571296 if year == 1975 
	replace `i' = `i' * 1.447016669 if year == 1976 
	replace `i' = `i' * 1.359074187 if year == 1977 
	replace `i' = `i' * 1.26272953 if year == 1978 
	replace `i' = `i' * 1.135017842 if year == 1979 
	replace `i' = `i' * 1 if year == 1980 	
	replace `i' = `i' * 0.905974729 if year == 1981 
	replace `i' = `i' * 0.853418002 if year == 1982 
	replace `i' = `i' * 0.827279757 if year == 1983 
	replace `i' = `i' * 0.792654883 if year == 1984 
	replace `i' = `i' * 0.765641264 if year == 1985 
	replace `i' = `i' * 0.751039274 if year == 1986 
	replace `i' = `i' * 0.725093956 if year == 1987 
	replace `i' = `i' * 0.69653773 if year == 1988
	replace `i' = `i' * 0.664689936 if year == 1989
	replace `i' = `i' * 0.630523963 if year == 1990 
	replace `i' = `i' * 0.605014431 if year == 1991
	gen log`i' = log(`i') 
}

replace unemp = . 
replace unemp = 1 if ftpt79 == 3 | ftpt89 == 6
replace unemp = 0.5 if ftpt79 == 5 | ftpt89 == 7
replace unemp = 0 if ftpt79 == 1 | ftpt79 == 2 | ftpt79 == 4 | (ftpt89 >=2 & ftpt89 <=5)

do $folder/smsalabels.do"
sort smsa year

drop gradeat gradecp uhourse earnhre earnhr cmsarank pmsarank msafips cmsacode metarea
saveold $folder/aux_may-org.dta, replace

*********
*********
rename unemp unemp_whole_lf

global vars low_skilled college male age white black hisp cuban whitecol bluecol union ///
	  manuf hisp_lowskilled cuban_lowskilled unemp_whole_lf
collapse (mean) $vars [pw=weight], by(year smsarank)
saveold  $folder/msa_controls_may-org.dta, replace
